setwd("d:/PORES Dropbox/Stephen Pettigrew/ranked-choice-voting/replication files/")

require(tidyverse)
require(rio)
require(sf)

load("final-data/nyc/precinct-demos.RData")
demo <- demo.21 %>%
  mutate(year = 2021)

cw.21 <- readRDS("final-data/nyc/ballot-ids-precs~2021-06-22.RDS") %>%
  mutate(year = 2021)
cw <- cw.21 %>%
  mutate(prec = gsub("[^0-9]","",prec.id)) %>%
  select(year, ballot.id, prec)

cvr.21 <- readRDS("final-data/nyc/errors-nyc-2021-06-22.RDS")
cvr.raw <- cvr.21$errors %>%
  mutate(year = 2021)
offices <- cvr.21$manifest$Contest %>%
  mutate(year = 2021) %>%
  filter(grepl("Citywide", office))


all(cvr.21$errors$ballot.id %in% cw.21$ballot.id)



cvr <- cvr.raw %>%
  filter(office.id %in% offices$office.id) %>%
  merge(offices %>%
          select(office.id, office, party, year),
        by = c("office.id", "year"))


res <- cvr %>%
  filter(bubble.pattern != "") %>% # omit people who didn't for for an office
  mutate(any = overrank | overvote | skip) %>%
  
  merge(cw, by = c("year", "ballot.id")) %>%
  
  group_by(year, office.id, office, party, prec) %>%
  summarize(any = mean(any),
            overrank = mean(overrank),
            overvote = mean(overvote),
            skip = mean(skip),
            votes = n()) %>%
  ungroup() %>%
  merge(demo, by = c("year","prec"))

mods <- res %>%
  group_by(office) %>%
  do(model = lm(any ~ white + black + hisp + over.60 + below.pov + bach + english.very,
                #weight = votes,
                data = .)) %>%
  mutate(office = gsub("DEM", "Dem.", office),
         office = gsub("REP", "Rep.", office),
         office = gsub(" Citywide", "", office))
mods <- mods[c(2,1,3,4),]


mods$model %>%
  lapply(summary)

stargazer::stargazer(mods$model,
          label = "nyc-regs",
          title = "Predictors of mis-marking rates in NYC precincts",
          no.space = T,
          rownames = F,
          covariate.labels = c("\\% White", "\\% Black", "\\% Hispanic",
                               "\\% Over 60", 
                               "\\% below poverty line",
                               "\\% with Bachelors", "\\% English speakers",
                               "(Intercept)"),
          column.labels = mods$office, 
          dep.var.caption = "",
          dep.var.labels.include = F,
          keep.stat = c("n","rsq"),
          star.cutoffs = c(0.05, 0.01, 0.001))
